import os
import abbrs

path = r'G:\Users\01\Downloads\Websites\yedu.renpengpeng.com\info'

def proc(s):
	from bs4 import BeautifulSoup
	import re
	s = abbrs.read_file(os.path.join(path, str(s)))
	s = BeautifulSoup(s, features='lxml')
	title = s.find(class_='article-title').text
	s = s.find(class_='article-content')
	s.find(class_='article-audio').decompose()
	for i in s.find_all('img'): i.decompose()
	s = str(s)
	s = re.compile('</?div.*?>').sub('', s)
	return f'<h1>{title}</h1>\n\n{s}'

def make_all():
	s = ''
	for i in os.listdir(path):
		print(i)
		s += proc(i)
		s += '\n'
	return s

if __name__ == '__main__':
	abbrs.write_file('yedu-corpus.txt', make_all())